exp_dir=/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/hubert_feat/ximalaya_redian_2T
data_dir=/home/node27_tmpdata/xlgeng/pachong_10W_data/fairseq_data/manifest/ximalaya_redian_2T
exp_name=hubert_large_by_xlgeng
extract_layer=9            # which layer to extract hiddens features
feat=${exp_name}_extract_layer_${extract_layer}
feat_dir=${exp_dir}/feature/${feat}
nj=40

n_cluster=500
km_dir=${exp_dir}/k-means/${feat}
mkdir -p ${km_dir}
train_set="train"
echo "wo shi gengxuelong "
echo "stage 12: K-means clustering on Hubert (iter2) featues started @ `date`"
python tools/learn_kmeans.py      \
    ${feat_dir}/${train_set} ${train_set} ${nj}                           \
    ${km_dir}/model.mdl ${n_cluster}                        \
    --percent 0.1 --batch_size 50000
echo "stage 12: Done @ `date`"
